% Creates Summary Statistics and Histogram of T_i for women

clear 
addpath('Routines')
addpath('Powell')

logfile_name=[mfilename '.log'];
result_name=[mfilename '_results'];
delete(logfile_name);
diary(logfile_name);


load('NLSY.mat')

diary on
summstats(dat,varnames)
diary off

% Sample selection
female=getvariables(dat,varnames,{'female'});
dat=dat(female==1,:);
n=size(dat,1);
% 

female=getvariables(dat,varnames,{'female'});
year=getvariables(dat,varnames,{'year'});
ID=getvariables(dat,varnames,{'ID'});


x1names={'children' 'married' 'sp_inc4'};
x1=getvariables(dat,varnames,x1names);

T1=min(year);
T2=max(year);
yr_names={};
tdummies=zeros(n,T2-T1);

for t=T1:T2-1
    tmp=['yr' num2str(t)];
    yr_names=cat(2,yr_names, {tmp});
    tdummies(:,t-T1+1)=(year==t);
end
summstats(tdummies,yr_names)
    
yname={'empl'};
y=getvariables(dat,varnames,yname);


% create nn 
% data for individual iobs is in rows nn(iobs,1) to nn(iobs,2)

IDs=unique(ID);
nobs=length(IDs);
nn=zeros(nobs,2);
nn(1,1)=1;
iobs=1;
for i=2:n
    if ID(i)~=ID(i-1)
        nn(iobs,2)=i-1;
        iobs=iobs+1;
        nn(iobs,1)=i;
    end
end
nn(iobs,2)=n;
if iobs~=nobs
    fprintf('something is wrong with panel structure')
end

y_lag=zeros(n,1)+NaN;
for i=2:n
    if (ID(i)==ID(i-1)) && (year(i)==(year(i-1)+1))
        y_lag(i)=y(i-1);
    end
end




% logit ignoring fixed effects
yy=y;
xx=[ones(n,1) y_lag x1 tdummies(:,2:size(tdummies,2))];
xxnames=cat(2,{'const.' 'lagged y'},cat(2,x1names,yr_names(2:length(yr_names))));

% get rid of ones without a lag
ii=find((~isnan(y_lag)).*(~isnan(y)));
yy=yy(ii);
xx=xx(ii,:);
IIDD=ID(ii);

IIDDs=unique(IIDD);
nobs1=length(IIDDs);

counts=zeros(20,1);
for i=1:nobs1
    ii=length(find(IIDD==IIDDs(i)));
    counts(ii,1)=counts(ii,1)+1;
end
    
bar(counts(1:13))
saveas(gcf,'HistoFemales.png')